The central nervous system (CNS) in vertebrates is encased by three layers of tissue that together comprise the meninges. The outermost layer of tissue is the dura mater, the arachnoid mater lies one layer deeper, and the innermost layer is the pia mater, that adheres to the brain surface. The dura layer performs important structural roles in the CNS.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Seurat)
## The legacy packages maptools, rgdal, and rgeos, underpinning this package
## will retire shortly. Please refer to R-spatial evolution reports on
## https://r-spatial.org/r/2023/05/15/evolution4.html for details.
## This package is now running under evolution status 0
## Attaching SeuratObject
library(patchwork)
library(ggplot2)

1- LOAD DATA

load("./SRA866994_SRS4545965.sparse.RData")

rownames(sm) <- gsub("\\_E.*","",rownames(sm))
rownames(sm) <- gsub("\\_","-",rownames(sm))
head(rownames(sm))
## [1] "-343C11.2"      "00R-AC107638.2" "0610009O20Rik"  "1110020A21Rik" 
## [5] "1700007L15Rik"  "1700015F17Rik"
dura <- CreateSeuratObject(counts = sm, project = "scRNA_dura", min.cells = 3, min.features = 200) #filter out all cells in which there are less than 200 genes expressed (min.features) and genes expressed in less than 3 cells (min.cells)
## Warning: Non-unique features (rownames) present in the input matrix, making
## unique

2- CELL QUALITY CONTROL

grep("^mt-",rownames(dura),value = TRUE)
##  [1] "mt-Atp6" "mt-Co1"  "mt-Co2"  "mt-Co3"  "mt-Cytb" "mt-Nd1"  "mt-Nd2" 
##  [8] "mt-Nd3"  "mt-Nd4"  "mt-Nd4l" "mt-Nd5"  "mt-Nd6"  "mt-Rnr1" "mt-Rnr2"
## [15] "mt-Tc"   "mt-Tn"   "mt-Tp"
dura[["percent.mt"]] <- PercentageFeatureSet(dura, pattern = "^mt-")

grep("^Rp[ls]",rownames(dura),value = TRUE)
##   [1] "Rpl10"       "Rpl10-ps1"   "Rpl10-ps2"   "Rpl10-ps4"   "Rpl10-ps6"  
##   [6] "Rpl10a"      "Rpl10a-ps1"  "Rpl11"       "Rpl12"       "Rpl12-ps1"  
##  [11] "Rpl13"       "Rpl13-ps1"   "Rpl13-ps6"   "Rpl13a"      "Rpl13a-ps1" 
##  [16] "Rpl14"       "Rpl14-ps1"   "Rpl15"       "Rpl15-ps2"   "Rpl15-ps3"  
##  [21] "Rpl17"       "Rpl17-ps1"   "Rpl17-ps10"  "Rpl17-ps3"   "Rpl17-ps4"  
##  [26] "Rpl17-ps5"   "Rpl17-ps8"   "Rpl17-ps9"   "Rpl18"       "Rpl18-ps1"  
##  [31] "Rpl18-ps2"   "Rpl18a"      "Rpl18a-ps1"  "Rpl19"       "Rpl19-ps1"  
##  [36] "Rpl19-ps11"  "Rpl19-ps12"  "Rpl21"       "Rpl21-ps1"   "Rpl21-ps10" 
##  [41] "Rpl21-ps11"  "Rpl21-ps15"  "Rpl21-ps3"   "Rpl21-ps5"   "Rpl21-ps6"  
##  [46] "Rpl21-ps7"   "Rpl21-ps8"   "Rpl22"       "Rpl22-ps1"   "Rpl22l1"    
##  [51] "Rpl23"       "Rpl23a"      "Rpl23a-ps1"  "Rpl23a-ps14" "Rpl23a-ps2" 
##  [56] "Rpl23a-ps3"  "Rpl23a-ps5"  "Rpl24"       "Rpl26"       "Rpl27"      
##  [61] "Rpl27-ps1"   "Rpl27-ps2"   "Rpl27-ps3"   "Rpl27a"      "Rpl27a-ps1" 
##  [66] "Rpl27a-ps2"  "Rpl28"       "Rpl28-ps1"   "Rpl28-ps3"   "Rpl29"      
##  [71] "Rpl29-ps5"   "Rpl3"        "Rpl3-ps1"    "Rpl3-ps2"    "Rpl30"      
##  [76] "Rpl30-ps1"   "Rpl30-ps10"  "Rpl30-ps11"  "Rpl30-ps3"   "Rpl30-ps9"  
##  [81] "Rpl31"       "Rpl31-ps1"   "Rpl31-ps10"  "Rpl31-ps11"  "Rpl31-ps13" 
##  [86] "Rpl31-ps14"  "Rpl31-ps15"  "Rpl31-ps16"  "Rpl31-ps17"  "Rpl31-ps20" 
##  [91] "Rpl31-ps6"   "Rpl31-ps8"   "Rpl31-ps9"   "Rpl32"       "Rpl32-ps"   
##  [96] "Rpl34"       "Rpl34-ps1"   "Rpl34-ps2"   "Rpl35"       "Rpl35a"     
## [101] "Rpl35a-ps2"  "Rpl35a-ps3"  "Rpl35a-ps4"  "Rpl35a-ps5"  "Rpl35a-ps6" 
## [106] "Rpl35a-ps7"  "Rpl36"       "Rpl36-ps10"  "Rpl36-ps2"   "Rpl36-ps3"  
## [111] "Rpl36-ps9"   "Rpl36a"      "Rpl36a-ps2"  "Rpl36a-ps3"  "Rpl36a-ps5" 
## [116] "Rpl36al"     "Rpl37"       "Rpl37a"      "Rpl37rt"     "Rpl38"      
## [121] "Rpl38-ps1"   "Rpl38-ps2"   "Rpl39"       "Rpl39-ps"    "Rpl39l"     
## [126] "Rpl3l"       "Rpl4"        "Rpl41"       "Rpl5"        "Rpl5-ps1"   
## [131] "Rpl5-ps2"    "Rpl6"        "Rpl7"        "Rpl7-ps7"    "Rpl7a"      
## [136] "Rpl7a-ps10"  "Rpl7a-ps11"  "Rpl7a-ps12"  "Rpl7a-ps3"   "Rpl7a-ps5"  
## [141] "Rpl7a-ps7"   "Rpl7a-ps8"   "Rpl7l1"      "Rpl8"        "Rpl9"       
## [146] "Rpl9-ps1"    "Rpl9-ps4"    "Rpl9-ps6"    "Rpl9-ps7"    "Rplp0"      
## [151] "Rplp1"       "Rplp1-ps1"   "Rplp2"       "Rps10"       "Rps10-ps1"  
## [156] "Rps10-ps2"   "Rps10-ps4"   "Rps11"       "Rps11-ps1"   "Rps11-ps2"  
## [161] "Rps11-ps3"   "Rps11-ps4"   "Rps12"       "Rps12-ps1"   "Rps12-ps10" 
## [166] "Rps12-ps19"  "Rps12-ps20"  "Rps12-ps23"  "Rps12-ps24"  "Rps12-ps3"  
## [171] "Rps12-ps5"   "Rps12-ps9"   "Rps13"       "Rps13-ps1"   "Rps13-ps2"  
## [176] "Rps13-ps4"   "Rps13-ps5"   "Rps14"       "Rps15"       "Rps15-ps2"  
## [181] "Rps15a"      "Rps15a-ps1"  "Rps15a-ps2"  "Rps15a-ps3"  "Rps15a-ps4" 
## [186] "Rps15a-ps5"  "Rps15a-ps6"  "Rps15a-ps7"  "Rps15a-ps8"  "Rps16"      
## [191] "Rps16-ps2"   "Rps17"       "Rps18"       "Rps18-ps1"   "Rps19"      
## [196] "Rps19-ps1"   "Rps19-ps12"  "Rps19-ps2"   "Rps19-ps3"   "Rps19-ps4"  
## [201] "Rps19-ps5"   "Rps19-ps6"   "Rps19-ps7"   "Rps19-ps8"   "Rps19-ps9"  
## [206] "Rps19bp1"    "Rps2"        "Rps2-ps10"   "Rps2-ps11"   "Rps2-ps13"  
## [211] "Rps2-ps2"    "Rps2-ps4"    "Rps2-ps5"    "Rps2-ps9"    "Rps20"      
## [216] "Rps21"       "Rps23"       "Rps23-ps1"   "Rps23-ps2"   "Rps24"      
## [221] "Rps24-ps2"   "Rps24-ps3"   "Rps25"       "Rps25-ps1"   "Rps26"      
## [226] "Rps26-ps1"   "Rps27"       "Rps27-ps1"   "Rps27a"      "Rps27a-ps1" 
## [231] "Rps27a-ps2"  "Rps27l"      "Rps27rt"     "Rps28"       "Rps29"      
## [236] "Rps3"        "Rps3a1"      "Rps3a2"      "Rps3a3"      "Rps4l"      
## [241] "Rps4x"       "Rps4x-ps"    "Rps5"        "Rps6"        "Rps6-ps4"   
## [246] "Rps6ka1"     "Rps6ka2"     "Rps6ka3"     "Rps6ka4"     "Rps6ka5"    
## [251] "Rps6kb1"     "Rps6kb2"     "Rps6kc1"     "Rps6kl1"     "Rps7"       
## [256] "Rps8"        "Rps8-ps2"    "Rps8-ps4"    "Rps9"        "Rpsa"       
## [261] "Rpsa-ps1"    "Rpsa-ps10"   "Rpsa-ps11"   "Rpsa-ps12"   "Rpsa-ps4"   
## [266] "Rpsa-ps5"    "Rpsa-ps9"
dura[["percent.rbp"]] <- PercentageFeatureSet(dura, pattern = "^Rp[ls]")

head(dura@meta.data, 5) #nCount_RNA is actually the column sum (number of reads for each cells, library size). nFeature_RNA is how many genes are found to be transcribed. rownames in this commands are the cells. percent of rbp means percantage of ribosomal protein gene which is not ribosomal RNA. These are protein coding genes coming from captured by polyA but they are the ones eat up a lot of reads.
##                  orig.ident nCount_RNA nFeature_RNA percent.mt percent.rbp
## AAACCTGAGAGTCTGG scRNA_dura       2977          690 0.03359086    3.123950
## AAACCTGAGCTGCAAG scRNA_dura       5313         1647 0.92226614   31.752306
## AAACCTGAGCTGTCTA scRNA_dura       5006         1633 0.69916101   14.822213
## AAACCTGAGTTCCACA scRNA_dura       6778         1104 0.14753615    4.027737
## AAACCTGAGTTTAGGA scRNA_dura       3818         1567 0.96909377   17.679413

pre-processing:

pre_qc <- VlnPlot(dura, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rbp"), ncol = 4, pt.size=0, cols = "darkblue")
pre_qc

plot1 <- FeatureScatter(dura, feature1 = "nCount_RNA", feature2 = "percent.mt", cols = "olivedrab3")
plot2 <- FeatureScatter(dura, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", cols = "gold2")
plot3 <- FeatureScatter(dura, feature1 = "nCount_RNA", feature2 = "percent.rbp", cols = "slateblue2")

plot1

plot2

plot3

Let’s see if there is a correlation. with nCount_RNA (how many reads per cells) vs percent.mt graph,if it is negative there is no correlation. for example, instead, in nCount_RNA vs nFeature_RNA there is a very strong correlation between how many reads you have per cells and how many genes you found transcribing in each cells. The general idea, the more reads you have given cells , the more genes transcribed. but keeping mind when there are too many (outliers) is not because it is a good cell but because it is a doublet. nCount_RNA vs percent.rbp graph, if no correlation it shows that if you have a lot of reads, it is not that are going to eaten in way only by rbp which is a good point. Generally cut is done when the violin plot becomes linear line.

as_tibble(
  dura[[]],
  rownames="Cell.Barcode"
) -> qc.metrics

head(qc.metrics)
## # A tibble: 6 × 6
##   Cell.Barcode     orig.ident nCount_RNA nFeature_RNA percent.mt percent.rbp
##   <chr>            <fct>           <dbl>        <int>      <dbl>       <dbl>
## 1 AAACCTGAGAGTCTGG scRNA_dura       2977          690     0.0336        3.12
## 2 AAACCTGAGCTGCAAG scRNA_dura       5313         1647     0.922        31.8 
## 3 AAACCTGAGCTGTCTA scRNA_dura       5006         1633     0.699        14.8 
## 4 AAACCTGAGTTCCACA scRNA_dura       6778         1104     0.148         4.03
## 5 AAACCTGAGTTTAGGA scRNA_dura       3818         1567     0.969        17.7 
## 6 AAACCTGCACCTCGGA scRNA_dura      10990         3184     0.437        19.1

percent mt

qc.metrics %>%
  arrange(percent.mt) %>%
  ggplot(aes(nCount_RNA,nFeature_RNA,colour=percent.mt)) + 
  geom_point() + 
  scale_color_gradientn(colors=c("black","blue","green2","red","yellow")) +
  ggtitle("Example of plotting QC Metrics - Coloured by %MT") +
  geom_hline(yintercept = 200) +
  geom_hline(yintercept = 4000) 

percent rbp

qc.metrics %>%
  arrange(percent.rbp) %>%
  ggplot(aes(nCount_RNA,nFeature_RNA,colour=percent.rbp)) + 
  geom_point() + 
  scale_color_gradientn(colors=c("black","blue","green2","red","yellow")) +
  ggtitle("Example of plotting QC Metrics - Coloured by %Rbp") +
  geom_hline(yintercept = 200) +
  geom_hline(yintercept = 4000) 

So, this is the choice for my dataset.

dura <- subset(dura, subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 5)
head(dura)
##                  orig.ident nCount_RNA nFeature_RNA percent.mt percent.rbp
## AAACCTGAGAGTCTGG scRNA_dura       2977          690 0.03359086    3.123950
## AAACCTGAGCTGCAAG scRNA_dura       5313         1647 0.92226614   31.752306
## AAACCTGAGCTGTCTA scRNA_dura       5006         1633 0.69916101   14.822213
## AAACCTGAGTTCCACA scRNA_dura       6778         1104 0.14753615    4.027737
## AAACCTGAGTTTAGGA scRNA_dura       3818         1567 0.96909377   17.679413
## AAACCTGCACCTCGGA scRNA_dura      10990         3184 0.43676069   19.135578
## AAACCTGCAGGTGCCT scRNA_dura       5300         1702 0.32075472   10.641509
## AAACCTGGTGACTCAT scRNA_dura       1772          904 1.80586907   26.185102
## AAACCTGGTGGCCCTA scRNA_dura       2163          709 0.09246417    3.606103
## AAACCTGTCACTGGGC scRNA_dura       1635          526 0.12232416    5.321101

After processing:

after_qc <- VlnPlot(dura, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rbp"), ncol = 4, pt.size=0, cols = "darkblue")
after_qc

3- NORMALIZATION + Cell Cycle Scoring

dura <- NormalizeData(dura, normalization.method = "LogNormalize", scale.factor = 10000) #scale your original count in scale factor, count per 10000 reads and then compute the log.
cc.genes.updated.2019
## $s.genes
##  [1] "MCM5"     "PCNA"     "TYMS"     "FEN1"     "MCM7"     "MCM4"    
##  [7] "RRM1"     "UNG"      "GINS2"    "MCM6"     "CDCA7"    "DTL"     
## [13] "PRIM1"    "UHRF1"    "CENPU"    "HELLS"    "RFC2"     "POLR1B"  
## [19] "NASP"     "RAD51AP1" "GMNN"     "WDR76"    "SLBP"     "CCNE2"   
## [25] "UBR7"     "POLD3"    "MSH2"     "ATAD2"    "RAD51"    "RRM2"    
## [31] "CDC45"    "CDC6"     "EXO1"     "TIPIN"    "DSCC1"    "BLM"     
## [37] "CASP8AP2" "USP1"     "CLSPN"    "POLA1"    "CHAF1B"   "MRPL36"  
## [43] "E2F8"    
## 
## $g2m.genes
##  [1] "HMGB2"   "CDK1"    "NUSAP1"  "UBE2C"   "BIRC5"   "TPX2"    "TOP2A"  
##  [8] "NDC80"   "CKS2"    "NUF2"    "CKS1B"   "MKI67"   "TMPO"    "CENPF"  
## [15] "TACC3"   "PIMREG"  "SMC4"    "CCNB2"   "CKAP2L"  "CKAP2"   "AURKB"  
## [22] "BUB1"    "KIF11"   "ANP32E"  "TUBB4B"  "GTSE1"   "KIF20B"  "HJURP"  
## [29] "CDCA3"   "JPT1"    "CDC20"   "TTK"     "CDC25C"  "KIF2C"   "RANGAP1"
## [36] "NCAPD2"  "DLGAP5"  "CDCA2"   "CDCA8"   "ECT2"    "KIF23"   "HMMR"   
## [43] "AURKA"   "PSRC1"   "ANLN"    "LBR"     "CKAP5"   "CENPE"   "CTCF"   
## [50] "NEK2"    "G2E3"    "GAS2L3"  "CBX5"    "CENPA"
convertHumanGeneList <- function(x){
  require("biomaRt")
  human = useMart(biomart="ensembl", dataset = "hsapiens_gene_ensembl", verbose = TRUE, host = "https://dec2021.archive.ensembl.org")
  mouse = useMart(biomart="ensembl", dataset = "mmusculus_gene_ensembl", verbose = TRUE, host = "https://dec2021.archive.ensembl.org")
  genes = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T)
  
  humanx <- unique(genes[, 2])
  return(humanx)
}
m.s.genes <- convertHumanGeneList(cc.genes.updated.2019$s.genes)
## Loading required package: biomaRt
## BioMartServer running BioMart version: 0.7
## Mart virtual schema: default
## Mart host: https://dec2021.archive.ensembl.org:443/biomart/martservice
## Checking attributes ...
## Attempting web service request:
## https://dec2021.archive.ensembl.org:443/biomart/martservice?type=attributes&dataset=hsapiens_gene_ensembl&requestid=biomaRt&mart=ENSEMBL_MART_ENSEMBL&virtualSchema=default
##  ok
## Checking filters ...Attempting web service request:
## https://dec2021.archive.ensembl.org:443/biomart/martservice?type=filters&dataset=hsapiens_gene_ensembl&requestid=biomaRt&mart=ENSEMBL_MART_ENSEMBL&virtualSchema=default
##  ok
## BioMartServer running BioMart version: 0.7
## Mart virtual schema: default
## Mart host: https://dec2021.archive.ensembl.org:443/biomart/martservice
## Checking attributes ...Attempting web service request:
## https://dec2021.archive.ensembl.org:443/biomart/martservice?type=attributes&dataset=mmusculus_gene_ensembl&requestid=biomaRt&mart=ENSEMBL_MART_ENSEMBL&virtualSchema=default
##  ok
## Checking filters ...Attempting web service request:
## https://dec2021.archive.ensembl.org:443/biomart/martservice?type=filters&dataset=mmusculus_gene_ensembl&requestid=biomaRt&mart=ENSEMBL_MART_ENSEMBL&virtualSchema=default
##  ok
m.g2m.genes <- convertHumanGeneList(cc.genes.updated.2019$g2m.genes)
## BioMartServer running BioMart version: 0.7
## Mart virtual schema: default
## Mart host: https://dec2021.archive.ensembl.org:443/biomart/martservice
## Checking attributes ...Attempting web service request:
## https://dec2021.archive.ensembl.org:443/biomart/martservice?type=attributes&dataset=hsapiens_gene_ensembl&requestid=biomaRt&mart=ENSEMBL_MART_ENSEMBL&virtualSchema=default
##  ok
## Checking filters ...Attempting web service request:
## https://dec2021.archive.ensembl.org:443/biomart/martservice?type=filters&dataset=hsapiens_gene_ensembl&requestid=biomaRt&mart=ENSEMBL_MART_ENSEMBL&virtualSchema=default
##  ok
## BioMartServer running BioMart version: 0.7
## Mart virtual schema: default
## Mart host: https://dec2021.archive.ensembl.org:443/biomart/martservice
## Checking attributes ...Attempting web service request:
## https://dec2021.archive.ensembl.org:443/biomart/martservice?type=attributes&dataset=mmusculus_gene_ensembl&requestid=biomaRt&mart=ENSEMBL_MART_ENSEMBL&virtualSchema=default
##  ok
## Checking filters ...Attempting web service request:
## https://dec2021.archive.ensembl.org:443/biomart/martservice?type=filters&dataset=mmusculus_gene_ensembl&requestid=biomaRt&mart=ENSEMBL_MART_ENSEMBL&virtualSchema=default
##  ok
CellCycleScoring(dura, s.features = m.s.genes, g2m.features = m.g2m.genes, set.ident = TRUE) -> dura
## Warning: The following features are not present in the object: Uhrf1, Fen1,
## Ubr7, Usp1, Ung, not searching for symbol synonyms
## Warning: The following features are not present in the object: Ube2c, Pimreg,
## Jpt1, not searching for symbol synonyms
head(dura[[]])
##                  orig.ident nCount_RNA nFeature_RNA percent.mt percent.rbp
## AAACCTGAGAGTCTGG scRNA_dura       2977          690 0.03359086    3.123950
## AAACCTGAGCTGCAAG scRNA_dura       5313         1647 0.92226614   31.752306
## AAACCTGAGCTGTCTA scRNA_dura       5006         1633 0.69916101   14.822213
## AAACCTGAGTTCCACA scRNA_dura       6778         1104 0.14753615    4.027737
## AAACCTGAGTTTAGGA scRNA_dura       3818         1567 0.96909377   17.679413
## AAACCTGCACCTCGGA scRNA_dura      10990         3184 0.43676069   19.135578
##                       S.Score     G2M.Score Phase  old.ident
## AAACCTGAGAGTCTGG -0.006493485 -0.0052296526    G1 scRNA_dura
## AAACCTGAGCTGCAAG  0.047649640 -0.1198090630     S scRNA_dura
## AAACCTGAGCTGTCTA -0.065611963 -0.1307623862    G1 scRNA_dura
## AAACCTGAGTTCCACA  0.022383218  0.0007844138     S scRNA_dura
## AAACCTGAGTTTAGGA -0.077387423 -0.0700787659    G1 scRNA_dura
## AAACCTGCACCTCGGA  0.413915626  0.0651274090     S scRNA_dura

HIGHLY VARIABLE GENES

dura <- FindVariableFeatures(dura, selection.method = "vst", nfeatures = 2000) #now after that the analysis will be proceed with 2000 most significant (variable) genes.
top10 <- head(VariableFeatures(dura), 10)
plot_vf <- VariableFeaturePlot(dura)
plot_vf2 <- LabelPoints(plot = plot_vf, points = top10, repel = TRUE, xnudge = 0, ynudge =0)
plot_vf2
## Warning: Transformation introduced infinite values in continuous x-axis

SCALING

all.genes <- rownames(dura)
dura <- ScaleData(dura, features = all.genes)
## Centering and scaling data matrix

(regression should be done here if it is needed)

DIMENTIONAL REDUCTION

In first 10-15 (generally) depends on the biological variability but after that variability comes from techinal issues. In the elbow plot you can see the first point shows the first component which captures most of the variability. At some point, elbow plot reach a plataeu. After this plataeu, you have still variability but they comes from technical noise. You have to choose best border to cut. generally, standard deviation lower than 2, cut them.

dura <- RunPCA(dura, features = VariableFeatures(object = dura))
## PC_ 1 
## Positive:  Csf1r, C1qa, C1qb, C1qc, Aif1, Lgmn, Apoe, Ms4a7, Tmem176b, Ctsb 
##     Fcrls, Trf, Rgs10, Ctss, Tmem176a, Cd68, Fcgrt, Dab2, Pf4, Ctsc 
##     Adgre1, Cst3, Trem2, Mrc1, Fcgr1, P2ry6, Grn, Hpgd, Cx3cr1, Rnase4 
## Negative:  Cd24a, Wfdc21, Pglyrp1, Lcn2, Hmgb2, Serpinb1a, Ngp, Camp, Ly6g, Ltf 
##     Anxa1, Cd177, Cebpe, RP23-458B6.6, Trem3, Chil1, Cdkn2d, Hmgn2, Tuba4a, Ebf1 
##     Slpi, Hp, Cd79a, Vpreb3, Lrg1, Mgst2, Retnlg, Adpgk, Mmp9, RP23-101F14.3 
## PC_ 2 
## Positive:  Retnlg, Mmp9, Mmp8, Slpi, Pglyrp1, Wfdc21, Ly6g, S100a6, Ifitm6, Cxcr2 
##     Mcemp1, Lcn2, Fpr2, Lrg1, Hdc, Hp, Mxd1, Clec4d, Adam8, Ceacam10 
##     Chil1, Slfn1, Il1r2, Gsr, Fpr1, Padi4, RP23-101F14.3, Anxa1, Mrgpra2b, Clec4e 
## Negative:  Stmn1, Pclaf, Top2a, Rrm2, Ccna2, Spc24, Birc5, Cdca3, Tuba1b, Cks1b 
##     Cdk1, Cdca8, Nusap1, Pbk, H2afx, Asf1b, Tubb5, Ccnb2, Fbxo5, Smc2 
##     Tpx2, Lockd, Kif22, Aurkb, Hist1h2ao, Mki67, Spc25, Ptma, Neil3, Tacc3 
## PC_ 3 
## Positive:  Ly6d, Ighm, Cd79a, Cd79b, Iglc3, Ms4a1, Ebf1, Igkc, Spib, Cd2 
##     Gimap6, Tnfrsf13c, Vpreb3, Dusp2, Iglc1, Fcrla, Fcmr, Chchd10, Ptma, Iglc2 
##     H2-Q7, H2-DMb2, Ccnd2, Dnajc7, S100a10, Irf8, Pafah1b3, RP23-52N2.1, Gimap4, Rnase6 
## Negative:  Hp, Lcn2, Ifitm6, Anxa1, Wfdc21, Mcemp1, Gsr, Pglyrp1, RP23-458B6.6, Ifitm3 
##     Cd177, Trem3, Lrg1, Ngp, Mgst1, Ly6g, Slpi, Chil3, Lgals3, Ltf 
##     Camp, Chil1, Hmgb2, Mmp9, Mmp8, Cebpe, Retnlg, Glrx, RP23-101F14.3, Ckap4 
## PC_ 4 
## Positive:  Cd79b, Cd79a, Ly6d, Ebf1, Ighm, Vpreb3, Ms4a1, Igkc, Iglc3, Spib 
##     Cd24a, Tnfrsf13c, Chchd10, Fcrla, Iglc1, Fcmr, Iglc2, Pafah1b3, Rnase6, H2-DMb2 
##     Pou2af1, Ifi30, Mzb1, Dnajc7, Fam129c, RP23-435I4.11, RP23-106I21.6, Cnn3, Id3, RP23-272P19.7 
## Negative:  Nkg7, Ms4a4b, Ctsw, Ccl5, Il2rb, Gimap4, RP23-52N2.1, Lck, Klre1, Klrk1 
##     Klrb1c, Id2, Ncr1, Skap1, Trbc1, Klrd1, Gzma, Prf1, Gimap3, Trbc2 
##     Thy1, Ctla2a, Xcl1, Klra13-ps, Sh2d1a, Samd3, Gzmb, RP24-472D23.1, Fasl, Cd3g 
## PC_ 5 
## Positive:  Gas6, Pf4, Cbr2, Igf1, Ltc4s, Stab1, Fcrls, Cd63, Blvrb, Gypa 
##     Hba-a2, Maf, Mrc1, Slc4a1, Hba-a1, Hbb-bt, Rhd, Igfbp4, Cldn13, Car2 
##     Snca, Pmp22, C1qc, Dab2, C1qa, Cd63-ps, Rab3il1, Trim10, Hemgn, Epor 
## Negative:  S100a4, Ccr2, Lgals3, Ms4a4c, Crip1, Vim, Plac8, Plbd1, Mcub, Naaa 
##     Gm2a, Fn1, Psap, Sirpb1c, Ccl9, Smpdl3a, Tifab, Emb, Itgb7, RP24-150C17.2 
##     Mpeg1, Ms4a6c, RP24-80F7.5, S100a6, Ahnak, Dbi, AC092855.1, H2afy, Klf4, Olfm1
print(dura[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  Csf1r, C1qa, C1qb, C1qc, Aif1 
## Negative:  Cd24a, Wfdc21, Pglyrp1, Lcn2, Hmgb2 
## PC_ 2 
## Positive:  Retnlg, Mmp9, Mmp8, Slpi, Pglyrp1 
## Negative:  Stmn1, Pclaf, Top2a, Rrm2, Ccna2 
## PC_ 3 
## Positive:  Ly6d, Ighm, Cd79a, Cd79b, Iglc3 
## Negative:  Hp, Lcn2, Ifitm6, Anxa1, Wfdc21 
## PC_ 4 
## Positive:  Cd79b, Cd79a, Ly6d, Ebf1, Ighm 
## Negative:  Nkg7, Ms4a4b, Ctsw, Ccl5, Il2rb 
## PC_ 5 
## Positive:  Gas6, Pf4, Cbr2, Igf1, Ltc4s 
## Negative:  S100a4, Ccr2, Lgals3, Ms4a4c, Crip1
DimPlot(dura, reduction = "pca", group.by= "Phase", pt.size = 0.9, cols = c("#F0A0FF","#8F7C00","#9DCC00")) + 
  labs(x = "PC 1",
       y = "PC 2")

when running a PCA on cell cycle genes, actively proliferating cells remain distinct from G1 cells however, within actively proliferating cells, G2M and S phase cells group together. I am not removing cell cycle effect

———— NOTE: If cells are known to be differentiating and there is clear clustering differences between G2M and S phases, then you may want to regress out by the difference between the G2M and S phase scores, thereby still differentiating the cycling from the non-cycling cells.

Some procedure removes all signal associated with cell cycle. In some cases, this can negatively impact downstream analysis, particularly in differentiating processes (like murine hematopoiesis), where stem cells are quiescent and differentiated cells are proliferating (or vice versa). In this case, regressing out all cell cycle effects can blur the distinction between stem and progenitor cells as well.

As an alternative, it is suggested that regressing out the difference between the G2M and S phase scores. This means that signals separating non-cycling cells and cycling cells will be maintained, but differences in cell cycle phase amongst proliferating cells (which are often uninteresting), will be regressed out of the data. But in my data, G2M and S phase is already groupped together. So there is no need for regression of cell cycle effect.

ElbowPlot(dura, ndims=40)

———– Clustering: PCA:26, Resolution: 0.3 ————-

dura26 <- FindNeighbors(dura, dims = 1:26)
## Computing nearest neighbor graph
## Computing SNN
dura26_03 <- FindClusters(dura26, resolution = 0.3) #resolution parameter means that the higher is the resolution value, the higher number of the clusters. So if you are not satisfied by too few clusters you increased the resolution.If you have too many small clusters you reduce the resolution.
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5007
## Number of edges: 172528
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9504
## Number of communities: 18
## Elapsed time: 0 seconds
DimPlot(dura26_03, reduction = "pca", pt.size = 0.7, label = TRUE)

table(Idents(dura26_03))
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 927 681 516 503 463 375 237 217 196 180 160 145  97  97  94  57  38  24
# Calculate number of cells per cluster from object@ident
cell.num <- table(dura26_03@active.ident)

# Add cell number per cluster to cluster labels
ClusterLabels = paste(names(cell.num), paste0("(n = ", cell.num, ")"))

# Order legend labels in plot in the same order as 'ClusterLabels'
ClusterBreaks = names(cell.num)

# Plot tSNE with new legend labels for clusters
dura26_03 <- RunTSNE(dura26_03, dims=1:26)
DimPlot(dura26_03, reduction = "tsne", pt.size = 0.7, label = T) +
  scale_colour_discrete(breaks = ClusterBreaks, 
                        labels = ClusterLabels) +
  labs(x = "t-SNE 1",
       y = "t-SNE 2")

———– Clustering: PCA:36, Resolution: 0.5 ————-

dura36 <- FindNeighbors(dura, dims = 1:36)
## Computing nearest neighbor graph
## Computing SNN
dura36_05 <- FindClusters(dura36, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5007
## Number of edges: 182685
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9307
## Number of communities: 19
## Elapsed time: 0 seconds
# Calculate number of cells per cluster from object@ident
cell.num_36 <- table(dura36_05@active.ident)

# Add cell number per cluster to cluster labels
ClusterLabels_36 = paste(names(cell.num_36), paste0("(n = ", cell.num_36, ")"))

# Order legend labels in plot in the same order as 'ClusterLabels'
ClusterBreaks_36 = names(cell.num_36)

# Plot tSNE with new legend labels for clusters
dura36_05 <- RunTSNE(dura36_05, dims=1:36)
DimPlot(dura36_05, reduction = "tsne", pt.size = 0.7, label = T) +
  scale_colour_discrete(breaks = ClusterBreaks_36, 
                        labels = ClusterLabels_36) +
  labs(x = "t-SNE 1",
       y = "t-SNE 2")

We can continue with PCA:26, Resolution:0.3

head(dura26_03[[]],5)
##                  orig.ident nCount_RNA nFeature_RNA percent.mt percent.rbp
## AAACCTGAGAGTCTGG scRNA_dura       2977          690 0.03359086    3.123950
## AAACCTGAGCTGCAAG scRNA_dura       5313         1647 0.92226614   31.752306
## AAACCTGAGCTGTCTA scRNA_dura       5006         1633 0.69916101   14.822213
## AAACCTGAGTTCCACA scRNA_dura       6778         1104 0.14753615    4.027737
## AAACCTGAGTTTAGGA scRNA_dura       3818         1567 0.96909377   17.679413
##                       S.Score     G2M.Score Phase  old.ident RNA_snn_res.0.3
## AAACCTGAGAGTCTGG -0.006493485 -0.0052296526    G1 scRNA_dura               1
## AAACCTGAGCTGCAAG  0.047649640 -0.1198090630     S scRNA_dura               3
## AAACCTGAGCTGTCTA -0.065611963 -0.1307623862    G1 scRNA_dura               3
## AAACCTGAGTTCCACA  0.022383218  0.0007844138     S scRNA_dura               5
## AAACCTGAGTTTAGGA -0.077387423 -0.0700787659    G1 scRNA_dura               4
##                  seurat_clusters
## AAACCTGAGAGTCTGG               1
## AAACCTGAGCTGCAAG               3
## AAACCTGAGCTGTCTA               3
## AAACCTGAGTTCCACA               5
## AAACCTGAGTTTAGGA               4
VlnPlot(dura26_03,features="nCount_RNA")

VlnPlot(dura26_03,features="nFeature_RNA")

VlnPlot(dura26_03,features="percent.mt")

VlnPlot(dura26_03,features="percent.rbp")

Cell Cycle per Cluster

library(ggplot2)
dura26_03@meta.data %>%
  group_by(seurat_clusters,Phase) %>%
  count() %>%
  group_by(seurat_clusters) %>%
  mutate(percent=100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=Phase)) +
  geom_col() +
  ggtitle("Percentage of Cell Cycle Phases per Cluster")

Find Marker Genes:

dura26_03.markers <- FindAllMarkers(dura26_03, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
dura26_03.markers %>%
    group_by(cluster) %>%
    slice_max(n = 5, order_by = avg_log2FC)
## # A tibble: 90 × 7
## # Groups:   cluster [18]
##    p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
##    <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>  
##  1     0       4.68 0.969 0.086         0 0       Ms4a1  
##  2     0       4.49 0.975 0.683         0 0       Igkc   
##  3     0       4.09 0.798 0.124         0 0       Iglc2  
##  4     0       4.03 0.838 0.121         0 0       Iglc1  
##  5     0       3.79 0.992 0.254         0 0       Cd79b  
##  6     0       5.03 0.972 0.508         0 1       Retnlg 
##  7     0       3.90 0.959 0.271         0 1       Slpi   
##  8     0       3.23 0.991 0.534         0 1       Msrb1  
##  9     0       3.15 0.79  0.177         0 1       Mmp8   
## 10     0       3.12 0.997 0.772         0 1       S100a11
## # ℹ 80 more rows

Heatmap

dura26_03.markers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(dura26_03, features = top10$gene) + NoLegend()

d26_cluster0.markers <- FindMarkers(dura26_03, ident.1 = 0, min.pct = 0.25, test.use = "wilcox")
d26_cluster0.markers <- d26_cluster0.markers[order(-d26_cluster0.markers$avg_log2FC),]
head(d26_cluster0.markers, n=10)
##           p_val avg_log2FC pct.1 pct.2 p_val_adj
## Ms4a1         0   4.678835 0.969 0.086         0
## Igkc          0   4.489230 0.975 0.683         0
## Iglc2         0   4.085793 0.798 0.124         0
## Iglc1         0   4.030137 0.838 0.121         0
## Cd79b         0   3.793197 0.992 0.254         0
## Ly6d          0   3.786070 0.999 0.235         0
## Cd79a         0   3.681037 0.991 0.125         0
## Iglc3         0   3.413229 0.937 0.123         0
## Ighm          0   3.350475 0.999 0.622         0
## Tnfrsf13c     0   3.295215 0.841 0.089         0

Ms4a1, Cd79b, Cd79a are marker genes for B cells. Cluster 0 is B cell.

Lets move to cluster 1. which is in close proximity with cluster 5.

d26_cluster1.markers <- FindMarkers(dura26_03, ident.1 = 1, min.pct = 0.25, test.use = "wilcox")
d26_cluster1.markers <- d26_cluster1.markers[order(-d26_cluster1.markers$avg_log2FC),]
head(d26_cluster1.markers, n=10)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## Retnlg   0.000000e+00   5.027362 0.972 0.508  0.000000e+00
## Slpi     0.000000e+00   3.902600 0.959 0.271  0.000000e+00
## Msrb1    0.000000e+00   3.234855 0.991 0.534  0.000000e+00
## Mmp8     0.000000e+00   3.145236 0.790 0.177  0.000000e+00
## S100a11  0.000000e+00   3.119339 0.997 0.772  0.000000e+00
## S100a6   0.000000e+00   3.116517 0.977 0.573  0.000000e+00
## Mmp9     0.000000e+00   2.977081 0.866 0.154  0.000000e+00
## Ifitm1  4.030952e-132   2.910784 0.420 0.095 7.965966e-128
## S100a8  3.936682e-284   2.718955 1.000 0.976 7.779672e-280
## Il1b    2.913717e-106   2.690908 0.392 0.105 5.758087e-102
d26_cluster5.markers <- FindMarkers(dura26_03, ident.1 = 5, min.pct = 0.25, test.use = "wilcox")
d26_cluster5.markers <- d26_cluster5.markers[order(-d26_cluster5.markers$avg_log2FC),]
head(d26_cluster5.markers, n=10)
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## Ltf    8.768350e-290   3.853960 0.952 0.233 1.732801e-285
## Camp   8.629626e-188   3.815547 0.989 0.722 1.705387e-183
## Ngp    2.200414e-193   3.713114 0.989 0.683 4.348458e-189
## Lcn2   2.549926e-214   3.206883 0.995 0.481 5.039164e-210
## Ifitm6 6.079464e-210   2.915948 0.976 0.423 1.201424e-205
## Anxa1  3.676806e-214   2.744768 0.979 0.434 7.266104e-210
## Wfdc21 1.499142e-212   2.729519 0.984 0.390 2.962604e-208
## S100a9 1.064650e-177   2.538832 1.000 0.970 2.103962e-173
## S100a8 8.192064e-175   2.520061 1.000 0.977 1.618916e-170
## Chil3  5.181034e-198   2.496536 0.952 0.331 1.023876e-193

Camp is marker gene of Neurophils.

d26_cluster1and5.markers <- FindMarkers(dura26_03, ident.1 = c(1,5), min.pct = 0.25, test.use = "wilcox")
d26_cluster1and5.markers <- d26_cluster1and5.markers[order(-d26_cluster1and5.markers$avg_log2FC),]
head(d26_cluster1and5.markers, n=10)
##         p_val avg_log2FC pct.1 pct.2 p_val_adj
## Retnlg      0   5.398965 0.957 0.468         0
## S100a8      0   4.151343 1.000 0.973         0
## S100a9      0   4.102026 1.000 0.964         0
## Slpi        0   3.874868 0.936 0.212         0
## Wfdc21      0   3.762133 0.922 0.304         0
## Mmp8        0   3.679405 0.756 0.128         0
## Lcn2        0   3.296152 0.906 0.416         0
## Pglyrp1     0   3.271177 0.966 0.315         0
## Mmp9        0   3.241366 0.861 0.088         0
## Ifitm6      0   3.216711 0.814 0.371         0

S100a9 is marker gene of Neutrophils.

d26_cluster51.markers <- FindMarkers(dura26_03, ident.1 = 1, ident.2 = 5, min.pct = 0.25, test.use = "wilcox")
d26_cluster51.markers <- d26_cluster51.markers[order(-d26_cluster51.markers$avg_log2FC),]
head(d26_cluster51.markers, n = 10) #over-expressed in 1 to 5
##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## Ccl6   8.408724e-88   3.991572 0.784 0.211 1.661732e-83
## Wfdc17 8.017063e-24   3.742104 0.441 0.184 1.584332e-19
## Ifitm1 1.268280e-25   3.189098 0.420 0.144 2.506375e-21
## Srgn   4.980798e-75   2.946422 0.769 0.275 9.843052e-71
## Il1b   1.149113e-33   2.942425 0.392 0.056 2.270877e-29
## Fxyd5  3.111073e-76   2.524240 0.874 0.693 6.148102e-72
## Retnlg 3.129756e-94   2.498168 0.972 0.931 6.185023e-90
## Clec4d 2.741129e-56   2.370596 0.576 0.096 5.417020e-52
## Csf3r  6.248362e-30   2.086004 0.558 0.315 1.234801e-25
## Slpi   1.613780e-86   2.025679 0.959 0.893 3.189152e-82
d26_cluster51.markers <-
d26_cluster51.markers[order(d26_cluster51.markers$avg_log2FC),]
head(d26_cluster51.markers, n = 10) #over-expressed in 5 to 1
##                      p_val avg_log2FC pct.1 pct.2     p_val_adj
## Chil3        4.748926e-136  -3.537224 0.320 0.952 9.384829e-132
## Camp         7.727480e-124  -3.114188 0.764 0.989 1.527105e-119
## Ngp          3.296939e-124  -2.807242 0.759 0.989 6.515411e-120
## Ltf          7.586149e-127  -2.805146 0.358 0.952 1.499175e-122
## RP23-81C12.1  6.270382e-94  -1.956106 0.104 0.728  1.239153e-89
## Ly6d          5.527630e-13  -1.851277 0.094 0.261  1.092370e-08
## Cebpe         3.488081e-94  -1.723820 0.164 0.808  6.893146e-90
## Serpinb1a     2.808464e-91  -1.716097 0.231 0.877  5.550086e-87
## Cybb          6.376882e-81  -1.499656 0.320 0.909  1.260199e-76
## Orm1          4.465375e-71  -1.419817 0.012 0.437  8.824474e-67

These are all Neutrophils. Cluster 1 and 5 are Neutrophils.

Lets move to another big cluster which is 2. cluster2 is in close proximity with cluster9 and cluster3.

d26_cluster2.markers <- FindMarkers(dura26_03, ident.1 = 2, min.pct = 0.25, test.use = "wilcox")
d26_cluster2.markers <- d26_cluster2.markers[order(-d26_cluster2.markers$avg_log2FC),]
head(d26_cluster2.markers, n=10)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## Plac8   3.652499e-242   2.799093 0.969 0.523 7.218069e-238
## Ifitm3  1.256040e-266   2.446148 0.998 0.598 2.482187e-262
## Lyz2    2.253516e-226   2.381228 1.000 0.865 4.453398e-222
## Gngt2   1.691194e-157   2.178137 0.845 0.317 3.342137e-153
## Ms4a4c  6.019910e-263   2.170770 0.837 0.193 1.189655e-258
## Ms4a6c  6.664189e-279   2.168076 0.967 0.301 1.316977e-274
## Smpdl3a 2.919410e-267   2.157716 0.930 0.303 5.769339e-263
## Lgals3  1.571079e-235   2.152213 0.979 0.495 3.104766e-231
## Fn1      0.000000e+00   2.129027 0.618 0.039  0.000000e+00
## S100a4   0.000000e+00   2.056894 0.893 0.157  0.000000e+00

Lyz2 and Ifitm3 associated with Macrophages. But we have to consider why cluster2 is separated with cluster3

Lets see cluster9.

d26_cluster9.markers <- FindMarkers(dura26_03, ident.1 = 9, min.pct = 0.25, test.use = "wilcox")
d26_cluster9.markers <- d26_cluster9.markers[order(-d26_cluster9.markers$avg_log2FC),]
head(d26_cluster9.markers, n=10)
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## Stmn1  5.643989e-242   3.648208 0.994 0.147 1.115365e-237
## Pclaf   0.000000e+00   3.517733 0.983 0.089  0.000000e+00
## H2afx  4.229949e-177   3.281187 0.956 0.183 8.359225e-173
## Tuba1b 2.075891e-126   3.186591 0.994 0.439 4.102375e-122
## Tubb5  6.236222e-109   2.841091 1.000 0.648 1.232402e-104
## Rrm2   1.427195e-306   2.796936 0.856 0.062 2.820422e-302
## Birc5  6.653542e-297   2.694864 0.956 0.088 1.314873e-292
## Hmgb2  7.887011e-112   2.649561 1.000 0.490 1.558631e-107
## Top2a   0.000000e+00   2.608229 0.944 0.065  0.000000e+00
## H2afv  6.142610e-113   2.530038 1.000 0.456 1.213903e-108
d26_cluster97.markers <- FindMarkers(dura26_03, ident.1 = 9, ident.2 = 7, min.pct = 0.25, test.use = "wilcox")
d26_cluster97.markers <- d26_cluster97.markers[order(-d26_cluster97.markers$avg_log2FC),]
head(d26_cluster97.markers, n = 10) #over-expressed in 9 to 7
##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## Stmn1  3.706217e-72   3.942280 0.994 0.111 7.324226e-68
## Pclaf  3.204905e-73   3.906206 0.983 0.032 6.333533e-69
## Cd24a  5.853077e-40   3.617756 0.717 0.069 1.156685e-35
## Vpreb3 1.235775e-22   3.564054 0.494 0.083 2.442139e-18
## H2afx  1.641547e-58   3.390011 0.956 0.184 3.244025e-54
## Lyz2   1.589477e-06   3.317194 0.950 0.765 3.141124e-02
## Tuba1b 2.714304e-63   3.138599 0.994 0.539 5.364008e-59
## Ebf1   1.978800e-25   3.109762 0.489 0.041 3.910504e-21
## Rrm2   1.192536e-59   3.075818 0.856 0.028 2.356690e-55
## Hmgb2  8.432537e-64   3.017690 1.000 0.507 1.666438e-59
d26_cluster97.markers <-
d26_cluster97.markers[order(d26_cluster97.markers$avg_log2FC),]
head(d26_cluster97.markers, n = 10) #over-expressed in 7 to 9
##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## Ccl5   1.156187e-26  -4.647307 0.217 0.664 2.284857e-22
## Trbc2  4.744255e-64  -4.080846 0.089 0.972 9.375597e-60
## Cd3g   1.123484e-63  -3.592281 0.056 0.963 2.220228e-59
## Cd3d   9.492898e-64  -3.432042 0.056 0.968 1.875986e-59
## Cxcr6  4.969069e-32  -3.420098 0.067 0.622 9.819874e-28
## Trbc1  2.105921e-27  -3.120470 0.044 0.562 4.161721e-23
## Ms4a4b 3.275551e-54  -3.081203 0.211 0.945 6.473143e-50
## Trac   1.336437e-47  -2.925707 0.039 0.806 2.641067e-43
## Ltb    1.946658e-51  -2.886885 0.156 0.912 3.846985e-47
## Gimap4 5.685667e-48  -2.874636 0.133 0.857 1.123602e-43
d26_cluster92.markers <- FindMarkers(dura26_03, ident.1 = 2, ident.2 = 9, min.pct = 0.25, test.use = "wilcox")
d26_cluster92.markers <- d26_cluster92.markers[order(-d26_cluster92.markers$avg_log2FC),]
head(d26_cluster92.markers, n = 10) #over-expressed in 2 to 9
##                      p_val avg_log2FC pct.1 pct.2    p_val_adj
## Lst1          2.075429e-67   2.375258 0.983 0.411 4.101463e-63
## Ifitm3        2.149268e-71   2.371841 0.998 0.633 4.247383e-67
## Gngt2         1.956197e-34   2.348555 0.845 0.622 3.865837e-30
## Lyz2          1.156870e-61   2.333906 1.000 0.950 2.286207e-57
## Clec4a3       1.452146e-57   2.189732 0.878 0.244 2.869731e-53
## Ccl6          3.553361e-57   2.183748 0.957 0.456 7.022152e-53
## Sirpb1c       8.116604e-45   2.170806 0.731 0.128 1.604003e-40
## Ifitm6        2.701306e-58   2.154550 0.940 0.456 5.338320e-54
## RP24-150C17.2 9.821452e-43   2.086995 0.736 0.133 1.940915e-38
## Wfdc17        5.732199e-44   2.057861 0.897 0.450 1.132797e-39
d26_cluster92.markers <-
d26_cluster92.markers[order(d26_cluster92.markers$avg_log2FC),]
head(d26_cluster92.markers, n = 10) #over-expressed in 9 to 2
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## Stmn1   1.016368e-113  -3.880868 0.155 0.994 2.008546e-109
## Vpreb3   5.761001e-39  -3.825926 0.083 0.494  1.138489e-34
## Pclaf   3.253970e-111  -3.743342 0.143 0.983 6.430495e-107
## H2afx    1.079507e-94  -3.591643 0.207 0.956  2.133321e-90
## Ebf1     1.621690e-48  -3.346604 0.041 0.489  3.204783e-44
## Cd24a    8.962038e-41  -3.303178 0.233 0.717  1.771078e-36
## Tubb5    1.045096e-87  -3.255233 0.802 1.000  2.065318e-83
## Tuba1b   7.187086e-86  -3.183015 0.703 0.994  1.420312e-81
## Rrm2    2.022936e-108  -3.083587 0.041 0.856 3.997726e-104
## Chchd10  1.220801e-48  -3.015347 0.079 0.567  2.412547e-44

Vpreb3 is B cell marker.

Compare cluster2 with cluster3

d26_cluster32.markers <- FindMarkers(dura26_03, ident.1 = 2, ident.2 = 3, min.pct = 0.25, test.use = "wilcox")
d26_cluster32.markers <- d26_cluster32.markers[order(-d26_cluster32.markers$avg_log2FC),]
head(d26_cluster32.markers, n = 10) #over-expressed in 2 to 3
##                      p_val avg_log2FC pct.1 pct.2     p_val_adj
## Plac8        2.068649e-148   3.835427 0.969 0.334 4.088065e-144
## Chil3         6.741924e-64   3.714001 0.711 0.302  1.332339e-59
## Ly6c2         1.018481e-91   3.201431 0.806 0.274  2.012721e-87
## Hp           1.335406e-111   2.827284 0.837 0.197 2.639030e-107
## Msrb1        1.181956e-137   2.591010 0.977 0.579 2.335782e-133
## RP23-458B6.6  1.858202e-62   2.481162 0.578 0.115  3.672179e-58
## Mgst1        8.405793e-101   2.324253 0.766 0.127  1.661153e-96
## Ifitm6       1.298708e-119   2.267088 0.940 0.356 2.566508e-115
## Gsr          1.765232e-113   2.220515 0.816 0.127 3.488452e-109
## Smpdl3a      2.850352e-114   2.145704 0.930 0.414 5.632866e-110

Chil3 is a marker gene of monocytic cell Ly6c2 expressed in MC, MdC, pDC, neutr.

d26_cluster32.markers <-
d26_cluster32.markers[order(d26_cluster32.markers$avg_log2FC),]
head(d26_cluster32.markers, n = 10) #over-expressed in 3 to 2
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## C1qa    3.372681e-71  -4.635279 0.304 0.724  6.665091e-67
## C1qb    3.307569e-76  -4.372616 0.322 0.761  6.536418e-72
## C1qc    9.868060e-82  -4.269602 0.244 0.746  1.950126e-77
## H2-Eb1 6.342607e-117  -3.041480 0.533 0.956 1.253426e-112
## Cd81    7.504276e-88  -3.021541 0.141 0.718  1.482995e-83
## Apoe    1.376250e-14  -2.874894 0.983 0.962  2.719746e-10
## H2-Ab1 9.434704e-111  -2.723267 0.729 0.980 1.864486e-106
## H2-Aa  1.131081e-110  -2.712831 0.597 0.968 2.235243e-106
## Mgl2    5.899270e-63  -2.635574 0.056 0.523  1.165814e-58
## Cd74   5.618524e-108  -2.575299 0.899 0.990 1.110333e-103

C1q(a,b,c), Apoe, Cd74 are all marker genes of macrophages.

Move to Cluster3.

d26_cluster3.markers <- FindMarkers(dura26_03, ident.1 = 3, min.pct = 0.25, test.use = "wilcox")
d26_cluster3.markers <- d26_cluster3.markers[order(-d26_cluster3.markers$avg_log2FC),]
head(d26_cluster3.markers, n=10)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## H2-Eb1  4.905959e-217   3.062899 0.956 0.539 9.695157e-213
## H2-Aa   2.405642e-213   2.885373 0.968 0.578 4.754030e-209
## H2-Ab1  8.301749e-204   2.868042 0.980 0.672 1.640592e-199
## hexb    3.562620e-175   2.536431 0.893 0.368 7.040450e-171
## Cd209a  6.460307e-119   2.510314 0.350 0.055 1.276686e-114
## H2-DMb1 1.061138e-267   2.296896 0.915 0.259 2.097021e-263
## Cd74    5.002771e-185   2.276121 0.990 0.812 9.886476e-181
## C1qb    5.810713e-124   2.268768 0.761 0.317 1.148313e-119
## Apoe     4.691199e-51   2.261069 0.962 0.822  9.270747e-47
## Ctss    1.022290e-188   2.254561 0.974 0.573 2.020249e-184

This is also seems like Macrophages. Cd74 Macrophage, H2-Aa is in MHCII-hi BAM

d26_cluster4.markers <- FindMarkers(dura26_03, ident.1 = 4, min.pct = 0.25, test.use = "wilcox")
d26_cluster4.markers <- d26_cluster4.markers[order(-d26_cluster4.markers$avg_log2FC),]
head(d26_cluster4.markers, n=10)
##               p_val avg_log2FC pct.1 pct.2     p_val_adj
## Pf4    0.000000e+00   4.377688 0.981 0.118  0.000000e+00
## Ccl8  6.156136e-281   4.271986 0.626 0.075 1.216576e-276
## C1qc   0.000000e+00   3.608880 1.000 0.264  0.000000e+00
## C1qa   0.000000e+00   3.591002 0.998 0.288  0.000000e+00
## C1qb   0.000000e+00   3.364630 0.998 0.297  0.000000e+00
## Apoe  5.918529e-234   3.327572 1.000 0.820 1.169620e-229
## Cbr2   0.000000e+00   3.272362 0.892 0.046  0.000000e+00
## Mrc1   0.000000e+00   3.127201 0.927 0.088  0.000000e+00
## Fcrls  0.000000e+00   3.103633 0.935 0.089  0.000000e+00
## Dab2   0.000000e+00   3.070949 0.937 0.094  0.000000e+00

Ccl8, C1q(a-b-c), Apoe are the genes of Macrophages.Ccl8 is the marker gene of MHCII-lo BAM.

d26_cluster43.markers <- FindMarkers(dura26_03, ident.1 = 3, ident.2 = 4, min.pct = 0.25, test.use = "wilcox")
d26_cluster43.markers <- d26_cluster43.markers[order(-d26_cluster43.markers$avg_log2FC),]
head(d26_cluster43.markers, n = 10) #over-expressed in 3 to 4
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## Cd209a  1.461973e-20   2.332557 0.350 0.114 2.889150e-16
## H2-Eb1  7.211447e-78   1.947803 0.956 0.758 1.425126e-73
## H2-Ab1  1.567863e-74   1.845106 0.980 0.844 3.098411e-70
## H2-Aa   3.389368e-76   1.790676 0.968 0.788 6.698068e-72
## Cd74    4.169753e-76   1.723327 0.990 0.944 8.240266e-72
## Lsp1    2.265449e-46   1.667083 0.797 0.497 4.476980e-42
## Vim     1.065152e-32   1.611380 0.738 0.501 2.104953e-28
## Ccr2    9.081479e-60   1.595273 0.781 0.281 1.794682e-55
## Napsa   4.782497e-32   1.458145 0.531 0.197 9.451171e-28
## H2-DMb1 4.231984e-63   1.427866 0.915 0.598 8.363248e-59

H2-Aa and Cd74 are marker genes of MHCIIhi BAM.

d26_cluster43.markers <-
d26_cluster43.markers[order(d26_cluster43.markers$avg_log2FC),]
head(d26_cluster43.markers, n = 10) #over-expressed in 4 to 3
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## Ccl8    4.981980e-66  -4.280755 0.133 0.626  9.845390e-62
## Pf4    1.817344e-120  -2.891716 0.414 0.981 3.591435e-116
## Cbr2   7.488824e-120  -2.729404 0.163 0.892 1.479941e-115
## Cd209f  7.051667e-41  -2.632100 0.022 0.350  1.393550e-36
## F13a1  2.241141e-114  -2.615268 0.282 0.909 4.428943e-110
## Gas6   2.626431e-107  -2.229369 0.185 0.883 5.190354e-103
## Mrc1    3.123497e-99  -2.183073 0.447 0.927  6.172654e-95
## Ninj1   2.678256e-94  -2.175250 0.362 0.879  5.292770e-90
## Stab1   8.034978e-83  -1.994846 0.247 0.810  1.587872e-78
## Dab2    2.531454e-90  -1.952001 0.525 0.937  5.002660e-86

CCL8 marker genes of Macrophages. And from Gas6 gene we can understand that it is MHCIIlo BAM.

Move to Cluster6 and Cluster7 which are in close proximity.

d26_cluster6.markers <- FindMarkers(dura26_03, ident.1 = 6, min.pct = 0.25, test.use = "wilcox")
d26_cluster6.markers <- d26_cluster6.markers[order(-d26_cluster6.markers$avg_log2FC),]
head(d26_cluster6.markers, n=10)
##                     p_val avg_log2FC pct.1 pct.2     p_val_adj
## Gzma         0.000000e+00   6.248546 0.848 0.043  0.000000e+00
## Ccl5        4.033582e-253   4.910953 0.979 0.183 7.971164e-249
## Nkg7         0.000000e+00   4.542123 1.000 0.133  0.000000e+00
## RP23-52N2.1 1.411857e-190   4.317869 1.000 0.344 2.790112e-186
## Klra13-ps    0.000000e+00   3.920982 0.641 0.006  0.000000e+00
## Klra4        0.000000e+00   3.811391 0.460 0.006  0.000000e+00
## Ncr1         0.000000e+00   3.639103 0.882 0.012  0.000000e+00
## Klre1        0.000000e+00   3.535571 0.928 0.025  0.000000e+00
## Klrd1        0.000000e+00   3.485510 0.954 0.102  0.000000e+00
## Xcl1        7.108397e-210   3.440115 0.502 0.035 1.404761e-205

Gzma is marker gene of NK cells.

d26_cluster7.markers <- FindMarkers(dura26_03, ident.1 = 7, min.pct = 0.25, test.use = "wilcox")
d26_cluster7.markers <- d26_cluster7.markers[order(-d26_cluster7.markers$avg_log2FC),]
head(d26_cluster7.markers, n=10)
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## Trbc2   0.000000e+00   4.150907 0.972 0.075  0.000000e+00
## Cd3g    0.000000e+00   3.520112 0.963 0.044  0.000000e+00
## Cd3d    0.000000e+00   3.496582 0.968 0.061  0.000000e+00
## Trac    0.000000e+00   3.232137 0.806 0.029  0.000000e+00
## Ms4a4b 1.092076e-256   3.163835 0.945 0.128 2.158161e-252
## Trbc1  1.085831e-138   3.074764 0.562 0.074 2.145820e-134
## Thy1    0.000000e+00   2.835768 0.853 0.064  0.000000e+00
## Cxcr6  1.121751e-217   2.809718 0.622 0.051 2.216805e-213
## Gimap4 1.160761e-242   2.707215 0.857 0.102 2.293896e-238
## Cd3e    0.000000e+00   2.628572 0.783 0.029  0.000000e+00
d26_cluster76.markers <- FindMarkers(dura26_03, ident.1 = 6, ident.2 = 7, min.pct = 0.25, test.use = "wilcox")
d26_cluster76.markers <- d26_cluster76.markers[order(-d26_cluster76.markers$avg_log2FC),]
head(d26_cluster76.markers, n = 10) #over-expressed in 6 to 7
##                  p_val avg_log2FC pct.1 pct.2    p_val_adj
## Gzma      1.443053e-58   4.821049 0.848 0.083 2.851761e-54
## Klra4     8.811323e-29   3.635172 0.460 0.005 1.741294e-24
## Klra13-ps 6.708297e-40   3.539491 0.641 0.051 1.325694e-35
## Ncr1      1.736375e-66   3.442492 0.882 0.032 3.431423e-62
## Tyrobp    5.902647e-69   3.403342 0.975 0.369 1.166481e-64
## Klra8     6.074497e-35   3.402618 0.540 0.009 1.200442e-30
## Fcer1g    9.707978e-71   3.027297 0.996 0.475 1.918491e-66
## Ccl4      4.972723e-10   2.934427 0.333 0.097 9.827095e-06
## Prf1      9.907889e-42   2.843825 0.743 0.161 1.957997e-37
## Gzmb      6.066860e-31   2.693750 0.641 0.120 1.198933e-26

Gzma is a marker gene of NK cell.

d26_cluster76.markers <-
d26_cluster76.markers[order(d26_cluster76.markers$avg_log2FC),]
head(d26_cluster76.markers, n = 10) #over-expressed in 7 to 6
##                 p_val avg_log2FC pct.1 pct.2    p_val_adj
## Cd3d     1.477713e-76  -3.352606 0.059 0.968 2.920256e-72
## Cd3g     2.480341e-73  -3.247311 0.076 0.963 4.901651e-69
## Trac     1.906867e-56  -3.006886 0.076 0.806 3.768351e-52
## Cd3e     3.564880e-60  -2.749139 0.008 0.783 7.044916e-56
## Cd8b1    5.341519e-19  -2.697915 0.004 0.300 1.055591e-14
## Trbc2    5.386489e-57  -2.647685 0.321 0.972 1.064478e-52
## Ly6a     6.072114e-30  -2.617081 0.030 0.493 1.199971e-25
## Ifi27l2a 8.169995e-27  -2.536060 0.110 0.581 1.614554e-22
## Cxcr6    8.114994e-27  -2.104723 0.127 0.622 1.603685e-22
## Ms4a6b   6.173657e-35  -1.973032 0.312 0.806 1.220038e-30

Cd3e is T cell marker. Also, Cxcr6 is cytotoxic T cell. Cluster7 is T/NKT cells.

d26_cluster8.markers <- FindMarkers(dura26_03, ident.1 = 8, min.pct = 0.25, test.use = "wilcox")
d26_cluster8.markers <- d26_cluster8.markers[order(-d26_cluster8.markers$avg_log2FC),]
head(d26_cluster8.markers, n=10)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## Hbb-bs  3.090962e-01   7.904958 0.286 0.340 1.000000e+00
## Hba-a2  3.316536e-15   7.308181 0.250 0.097 6.554138e-11
## Elane   4.304917e-18   4.111550 0.296 0.116 8.507377e-14
## Car2    2.441922e-32   3.847025 0.255 0.057 4.825727e-28
## Prtn3   1.216752e-19   3.679656 0.301 0.115 2.404544e-15
## Mpo     9.139783e-05   3.105125 0.260 0.187 1.000000e+00
## Prdx2   5.628539e-02   2.252222 0.372 0.439 1.000000e+00
## mt-Atp6 1.845278e-01   1.776633 0.429 0.493 1.000000e+00
## mt-Co3  5.145861e-03   1.698269 0.429 0.427 1.000000e+00
## mt-Co2  1.811663e-02   1.682935 0.403 0.406 1.000000e+00

cluster 8 does not show any marker genes.it is unknown.

d26_cluster10.markers <- FindMarkers(dura26_03, ident.1 = 10, min.pct = 0.25, test.use = "wilcox")
d26_cluster10.markers <- d26_cluster10.markers[order(-d26_cluster10.markers$avg_log2FC),]
head(d26_cluster10.markers, n=10)
##               p_val avg_log2FC pct.1 pct.2     p_val_adj
## Chil3 2.562662e-117   3.570029 0.994 0.357 5.064333e-113
## Camp   4.759351e-87   3.490986 1.000 0.733  9.405429e-83
## Ngp    1.442252e-84   3.412844 1.000 0.696  2.850179e-80
## Elane  6.976288e-37   3.095042 0.450 0.112  1.378654e-32
## Fcnb   0.000000e+00   2.839995 0.844 0.047  0.000000e+00
## Ltf   3.157160e-109   2.825007 0.950 0.265 6.239180e-105
## Lcn2   2.078538e-82   2.667498 0.981 0.504  4.107607e-78
## Cebpe 3.199287e-241   2.581522 1.000 0.115 6.322430e-237
## Hmgn2 6.535498e-167   2.534618 1.000 0.213 1.291545e-162
## Hmgb2  3.403026e-95   2.263334 1.000 0.492  6.725061e-91

Cluster10 seems like Neutrophils because of Camp. Ngp and Ltf genes. But if it is like why it is not clustered with cluster5? to learn this, see the difference between cluster5 and 10.

d26_cluster105.markers <- FindMarkers(dura26_03, ident.1 = 5, ident.2 = 10, min.pct = 0.25, test.use = "wilcox")
d26_cluster105.markers <- d26_cluster105.markers[order(-d26_cluster105.markers$avg_log2FC),]
head(d26_cluster105.markers, n = 10) #over-expressed in 5 to 10
##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## Retnlg 1.192641e-33   3.229187 0.931 0.869 2.356897e-29
## Mmp8   3.766238e-28   2.912837 0.693 0.256 7.442840e-24
## S100a6 7.436606e-33   2.490365 0.885 0.650 1.469622e-28
## Mmp9   9.793491e-37   1.940866 0.851 0.425 1.935390e-32
## Ly6d   5.217914e-02   1.874359 0.261 0.225 1.000000e+00
## Igkc   3.276580e-05   1.795783 0.763 0.875 6.475178e-01
## Ifitm6 6.209107e-42   1.588174 0.976 0.931 1.227044e-37
## Prr13  5.081639e-35   1.537458 0.912 0.894 1.004234e-30
## Ly6g   1.434673e-23   1.403993 0.901 0.756 2.835200e-19
## Slpi   1.870616e-18   1.387405 0.893 0.887 3.696711e-14

Retnlg is highly expressed in Neutrophils.

d26_cluster105.markers <-
d26_cluster105.markers[order(d26_cluster105.markers$avg_log2FC),]
head(d26_cluster105.markers, n = 10) #over-expressed in 10 to 5
##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## Elane  2.723581e-17  -4.273120 0.123 0.450 5.382341e-13
## Prtn3  3.762645e-22  -2.952039 0.115 0.494 7.435738e-18
## Tuba1b 1.037083e-87  -2.886406 0.147 0.994 2.049483e-83
## Tubb5  2.067919e-74  -2.719282 0.341 1.000 4.086623e-70
## Mpo    2.383020e-05  -2.613196 0.216 0.419 4.709324e-01
## Pclaf  4.691588e-89  -2.602872 0.080 0.956 9.271516e-85
## Stmn1  1.727275e-91  -2.466686 0.099 0.988 3.413440e-87
## H2afx  5.327749e-77  -2.339219 0.096 0.894 1.052870e-72
## Ptma   3.366703e-66  -2.256320 0.371 1.000 6.653278e-62
## Birc5  1.808491e-92  -2.144753 0.061 0.950 3.573939e-88
d26_cluster11.markers <- FindMarkers(dura26_03, ident.1 = 11, min.pct = 0.25, test.use = "wilcox")
d26_cluster11.markers <- d26_cluster11.markers[order(-d26_cluster11.markers$avg_log2FC),]
head(d26_cluster11.markers, n=10)
##                  p_val avg_log2FC pct.1 pct.2     p_val_adj
## Dnajc7    1.027044e-98   3.916541 0.972 0.419  2.029644e-94
## Tifa     8.490249e-121   3.600638 0.924 0.252 1.677843e-116
## Ebf1     7.128602e-137   3.445706 1.000 0.257 1.408754e-132
## Arl5c    3.431960e-110   3.270504 0.862 0.221 6.782240e-106
## Vpreb3   1.257754e-127   3.257379 1.000 0.275 2.485573e-123
## Chchd10  2.613031e-125   3.086779 0.986 0.258 5.163872e-121
## Fam53b   2.899332e-109   3.005898 0.841 0.204 5.729660e-105
## Pafah1b3 7.527848e-107   2.807621 0.924 0.267 1.487653e-102
## Xrcc6    5.721991e-136   2.745970 0.669 0.084 1.130780e-131
## Cnp       7.267228e-75   2.738251 0.903 0.392  1.436150e-70

Ebf1 mostly expressed in B cells. And Vpreb3 is marker gene of B cells. Cluster11 is B cells.

d26_cluster12.markers <- FindMarkers(dura26_03, ident.1 = 12, min.pct = 0.25, test.use = "wilcox")
d26_cluster12.markers <- d26_cluster12.markers[order(-d26_cluster12.markers$avg_log2FC),]
head(d26_cluster12.markers, n=10)
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## Cst3    4.790570e-62   3.225389 1.000 0.855  9.467125e-58
## Naaa   2.179004e-106   3.202528 0.990 0.218 4.306147e-102
## Ppt1    6.941277e-76   2.472695 0.990 0.338  1.371735e-71
## Ckb     4.438006e-91   2.375433 1.000 0.234  8.770388e-87
## Id2     3.845022e-63   2.220650 0.990 0.348  7.598533e-59
## Irf8    1.254391e-61   2.219045 1.000 0.461  2.478927e-57
## Plbd1   2.007163e-72   2.172375 0.990 0.327  3.966555e-68
## Xcr1    0.000000e+00   2.113819 0.928 0.009  0.000000e+00
## Ifi205  0.000000e+00   2.102310 0.938 0.037  0.000000e+00
## H2-Ab1  2.316533e-44   2.054271 1.000 0.697  4.577933e-40

Xcr1 is cDC markger gene. Cluster12 is cDC1.

d26_cluster13.markers <- FindMarkers(dura26_03, ident.1 = 13, min.pct = 0.25, test.use = "wilcox")
d26_cluster13.markers <- d26_cluster13.markers[order(-d26_cluster13.markers$avg_log2FC),]
head(d26_cluster13.markers, n=10)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## Siglech  0.000000e+00   4.403234 1.000 0.046  0.000000e+00
## Cox6a2   0.000000e+00   4.272738 0.979 0.025  0.000000e+00
## Bst2     3.213492e-77   3.634570 1.000 0.408  6.350503e-73
## Klk1     0.000000e+00   3.462484 0.742 0.005  0.000000e+00
## Irf8     8.177503e-71   3.113865 1.000 0.461  1.616038e-66
## Ctsl    2.506594e-120   3.097849 0.979 0.172 4.953531e-116
## Smim5   8.837688e-204   2.914463 0.938 0.079 1.746504e-199
## Ccr9     0.000000e+00   2.898878 0.897 0.013  0.000000e+00
## Cd7     4.596473e-185   2.826838 0.948 0.082 9.083551e-181
## Cd209d  3.084143e-293   2.735542 0.691 0.021 6.094884e-289

Siglech is the marker gene of pDC cells. Cluster13 is pDC cell type.

d26_cluster14.markers <- FindMarkers(dura26_03, ident.1 = 14, min.pct = 0.25, test.use = "wilcox")
d26_cluster14.markers <- d26_cluster14.markers[order(-d26_cluster14.markers$avg_log2FC),]
head(d26_cluster14.markers, n=10)
##                  p_val avg_log2FC pct.1 pct.2     p_val_adj
## Trdc      1.285344e-67   4.187489 0.340 0.027  2.540096e-63
## Cxcr6    3.751435e-214   3.846493 0.894 0.060 7.413587e-210
## Gata3    3.873407e-183   3.632767 0.702 0.042 7.654627e-179
## Tcrg-C1  3.317877e-295   3.314830 0.511 0.008 6.556788e-291
## Cd3g      1.213057e-41   3.074363 0.447 0.077  2.397244e-37
## Thy1     5.036615e-114   2.721973 0.766 0.085 9.953359e-110
## Cd163l1  4.196408e-264   2.706868 0.319 0.002 8.292942e-260
## Ly6a      2.130116e-42   2.485651 0.617 0.142  4.209535e-38
## Il7r      3.065847e-93   2.286416 0.660 0.078  6.058726e-89
## Tnfrsf18  2.029799e-35   2.276536 0.489 0.110  4.011289e-31

Gata3 is a marker gene of innate lymphoid cells (ILC2).

d26_cluster15.markers <- FindMarkers(dura26_03, ident.1 = 15, min.pct = 0.25, test.use = "wilcox")
d26_cluster15.markers <- d26_cluster15.markers[order(-d26_cluster15.markers$avg_log2FC),]
head(d26_cluster15.markers, n=10)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## Prss34   5.903465e-40   7.189637 0.404 0.044  1.166643e-35
## Mcpt8    6.527987e-98   6.783429 0.561 0.034  1.290061e-93
## Ifitm1   1.996680e-76   4.122042 0.930 0.130  3.945839e-72
## Ccl3     2.597229e-79   3.808067 0.509 0.033  5.132644e-75
## Fcer1a  1.845643e-232   3.563017 0.474 0.006 3.647359e-228
## Ccl9     1.165255e-16   3.062140 0.772 0.305  2.302777e-12
## Ccl4     3.935822e-43   3.061363 0.474 0.052  7.777971e-39
## Cd200r3 6.218088e-247   3.001152 0.544 0.008 1.228819e-242
## Gata2    0.000000e+00   2.917251 0.772 0.006  0.000000e+00
## Csrp3    0.000000e+00   2.911262 0.614 0.001  0.000000e+00

There are genes highly expressed in T cells and DC and Macrophages. Cluster 15 is unknown.

d26_cluster150.markers <- FindMarkers(dura26_03, ident.1 = 0, ident.2 = 15, min.pct = 0.25, test.use = "wilcox")
d26_cluster150.markers <- d26_cluster150.markers[order(-d26_cluster150.markers$avg_log2FC),]
head(d26_cluster150.markers, n = 10) #over-expressed in 0 to 15
##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## Igkc   2.106968e-33   5.802644 0.975 0.649 4.163790e-29
## Iglc1  1.683703e-24   4.884682 0.838 0.123 3.327334e-20
## Cd74   2.002270e-35   4.806830 0.997 0.737 3.956886e-31
## Ms4a1  6.972144e-34   4.729871 0.969 0.105 1.377835e-29
## Ly6d   1.705377e-35   4.622189 0.999 0.123 3.370167e-31
## Cd79b  7.207602e-36   4.573060 0.992 0.333 1.424366e-31
## Cd79a  1.128407e-35   4.494297 0.991 0.088 2.229959e-31
## Iglc2  4.720903e-22   4.486896 0.798 0.140 9.329449e-18
## Vpreb3 3.692905e-28   4.379803 0.888 0.123 7.297918e-24
## Ighm   3.409327e-36   4.372572 0.999 0.491 6.737512e-32
d26_cluster150.markers <-
d26_cluster150.markers[order(d26_cluster150.markers$avg_log2FC),]
head(d26_cluster150.markers, n = 10) #over-expressed in 15 to 0
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## Prss34  1.916669e-37  -7.208378 0.029 0.404  3.787722e-33
## Mcpt8   1.518314e-72  -6.765719 0.022 0.561  3.000492e-68
## Ifitm1 3.601474e-137  -5.846642 0.027 0.930 7.117233e-133
## Ccl6    1.238556e-29  -5.474154 0.059 0.474  2.447634e-25
## Ccl9    2.325797e-79  -4.704317 0.047 0.772  4.596240e-75
## Cpa3    4.572023e-95  -4.528715 0.012 0.596  9.035232e-91
## Cd63    5.128697e-96  -4.148299 0.053 0.877  1.013533e-91
## Ccl3    7.205980e-94  -4.023956 0.004 0.509  1.424046e-89
## Ccl4    2.273614e-73  -3.829402 0.010 0.474  4.493116e-69
## Fxyd5   1.987224e-62  -3.724396 0.135 0.947  3.927152e-58
d26_cluster15and0.markers <- FindMarkers(dura26_03, ident.1 = c(0,15), min.pct = 0.25, test.use = "wilcox")
d26_cluster15and0.markers <- d26_cluster15and0.markers[order(-d26_cluster15and0.markers$avg_log2FC),]
head(d26_cluster15and0.markers, n=10)
##           p_val avg_log2FC pct.1 pct.2 p_val_adj
## Ms4a1         0   4.595374 0.919 0.085         0
## Igkc          0   4.392564 0.956 0.684         0
## Iglc2         0   3.998702 0.760 0.124         0
## Iglc1         0   3.937940 0.797 0.121         0
## Cd79b         0   3.702319 0.954 0.253         0
## Ly6d          0   3.694616 0.948 0.236         0
## Cd79a         0   3.590097 0.939 0.125         0
## Iglc3         0   3.322806 0.887 0.124         0
## Ighm          0   3.258323 0.970 0.623         0
## Tnfrsf13c     0   3.213256 0.797 0.089         0
d26_cluster16.markers <- FindMarkers(dura26_03, ident.1 = 16, min.pct = 0.25, test.use = "wilcox")
d26_cluster16.markers <- d26_cluster16.markers[order(-d26_cluster16.markers$avg_log2FC),]
head(d26_cluster16.markers, n=10)
##                   p_val avg_log2FC pct.1 pct.2     p_val_adj
## Cma1      1.896279e-171   9.315898 1.000 0.044 3.747426e-167
## Tpsb2      0.000000e+00   8.578259 1.000 0.011  0.000000e+00
## Mcpt4      0.000000e+00   7.926689 0.974 0.015  0.000000e+00
## Cpa3      8.097409e-283   7.925335 0.974 0.021 1.600210e-278
## Tpsab1     0.000000e+00   4.410061 0.632 0.002  0.000000e+00
## Speer4e    0.000000e+00   4.247394 0.368 0.000  0.000000e+00
## Serpinb1a  1.334312e-23   3.869892 0.868 0.338  2.636867e-19
## Cd63       1.546763e-28   3.774565 0.947 0.362  3.056713e-24
## Fcer1a    1.892808e-173   3.728854 0.500 0.008 3.740567e-169
## Ms4a2     7.929540e-257   3.599383 0.684 0.010 1.567036e-252

Tpsb2 and Tbsb1 are marker genes of Mast Cells. Cluster 16 is Mast Cells.

d26_cluster17.markers <- FindMarkers(dura26_03, ident.1 = 17, min.pct = 0.25, test.use = "wilcox")
d26_cluster17.markers <- d26_cluster17.markers[order(-d26_cluster17.markers$avg_log2FC),]
head(d26_cluster17.markers, n=10)
##                   p_val avg_log2FC pct.1 pct.2     p_val_adj
## Fscn1     2.323099e-254   5.212647 0.958 0.015 4.590909e-250
## Ccl22      0.000000e+00   5.075145 0.875 0.008  0.000000e+00
## Ccr7       3.508269e-63   4.830883 1.000 0.086  6.933041e-59
## Ccl17      2.964210e-83   4.238890 0.583 0.019  5.857873e-79
## Ccl5       5.847686e-24   3.793013 0.958 0.217  1.155620e-19
## Tmem123    2.183309e-23   3.664875 1.000 0.310  4.314656e-19
## Marcksl1   6.884521e-22   3.560302 0.958 0.307  1.360519e-17
## Serpinb6b  2.173536e-82   3.433082 0.833 0.041  4.295342e-78
## Syngr2     1.278776e-19   3.404419 1.000 0.449  2.527117e-15
## Bcl2a1b    1.184773e-25   3.376094 0.875 0.178  2.341349e-21

Ccr7 is marker cells of migDC. So cluster17 is migDC.

————- Cell Types ————————– 0 Ms4a1, B cells - found in the one vs. all comparison together with other B cell markers

1 S100a9, Neutrophils - found in the one vs all comparison

2 Lyz2, intermediate monocyte/macrophage population termed monocyte-derived cells (MdCs)

3 H2-Aa, MHCII-hi BAM cells

4 Ccl8, MHCII-lo BAM cells

5 Camp, Ngp, S100a9 Neutrophils

6 Gzma, Natural Killer (NK) cells - found in the one vs all comparison

7 Cd3e, T/NKT cells

8 unknown

9 Top2A, T cells (proliferative)

10 Camp, Ngp Neutrophils

11 Vpreb3, B cells - found in the one vs all comparison

12 Xcr1, cDC1 cells - found in the one vs all comparison

13 Siglech, pDC - found in the one vs all comparison

14 Gata3, ILC2 - found in the one vs all comparison

15 unknown

16 Tpsb2, Mast Cells - found in the one vs all comparison

17 Ccr7, migDC - found in the one vs all comparison

# Feature plot - visualize feature expression in low-dimensional space
features_short <- c("Ms4a1", "Cd3e", "Siglech")
FeaturePlot(dura26_03, features = features_short)

features <- c("Ms4a1", "Cd3e", "S100a9", "Siglech", "Top2a", "Lyz2", "Ccr7", "H2-Aa", "Gata3", "Ccl8", "Camp", "Gzma", "Vpreb3", "Xcr1", "Tpsb2" )
DotPlot(dura26_03, features = features) + RotatedAxis() 

new.cluster.ids <- c("B Cells", "Neutrophils", "Monocyte", "MHCII-hi BAM", "MHCII-lo BAM", "Neutrophils",
    "NK Cells", "T/NKT Cells", "Unknown", "T Cells (proliferative)", "Neutrophils", "B Cells", "cDC1", "pDC", "ILC2", "Unknown", "Mast Cells", "migDC")
names(new.cluster.ids) <- levels(dura26_03)
dura26_03 <- RenameIdents(dura26_03, new.cluster.ids)

DimPlot(dura26_03, reduction = "tsne", label = TRUE, pt.size = 0.5) + NoLegend()